{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gym\n",
    "import numpy as np\n",
    "import algorithms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "env = gym.make('Blackjack-v0')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<gym.envs.toy_text.blackjack.BlackjackEnv at 0x10990c090>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "policy, Q = algorithms.mc_offpolicy(env)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Play"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Recall that every state has the following structure (m, n, o), where:\n",
    "\n",
    "__m__: current hand\n",
    "\n",
    "__n__: opponent's visible card\n",
    "\n",
    "__o__: have you got an usable ace (1 if true, 0 otherwise)\n",
    "\n",
    "And possible actions are:\n",
    "\n",
    "__0:__ hold, __1:__ draw"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "__1) Get initial State__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10, 6, 0)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "state = algorithms.numerize_state(env.reset())\n",
    "state"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "__2) Obtain best action from policy, given current state__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "action = np.argmax(policy[state])\n",
    "action#, policy[state + (action, )]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "__3) Obtain ( state, reward, finished ) after executing action from (2)__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((19, 6, 0), 1.0, True)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tupl = env.step(action)\n",
    "state = algorithms.numerize_state(tupl[0])\n",
    "reward = tupl[1]\n",
    "finished = tupl[2]\n",
    "state, reward, finished"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
